
################################################################################
# Data processing for Exploratory (EA) and Confirmatory Analysis (CA)
################################################################################

# 1. Data preparation ----------------------------------------------------------

# Load data:
load("data_interest.RData") # split half 1 for EA IRT
load("data_interest_split2.RData") # split half 2 for CA IRT

# Remove double ids EA:
data_interest %>%   # check all participants that have taken part twice in the survey 
  group_by(id) %>%
  summarise(N_response = n()) %>%
  filter(N_response == 2)  
# Remove double ids CA:
data_interest_split2 %>%   
  group_by(id) %>%
  summarise(N_response = n()) %>%
  filter(N_response == 2)   

# Select only first entry of participants that participated twice:
IrtEa<- data_interest %>% # rename data set
  filter(!is.na(id)) %>%  # exclude participants with NA as id 
  group_by(id) %>% # group by ID 
  slice(1) %>%   # keep only the first response for each participant
  ungroup()
rm(data_interest) # remove old data frame
 
IrtCa <- data_interest_split2 %>%
  filter(!is.na(id)) %>% 
  group_by(id) %>% 
  slice(1) %>%   
  ungroup()
rm(data_interest_split2) 

# 2. Data frames for Interested participants -----------------------------------
# 2.1. Data frame with only positive scale items -------------------------------
# Exploratory analysis
EA_yes_pos<-subset(IrtEa, select = grepl("int_pos_id_YES_", names(IrtEa)))# select only positive items
EA_yes_pos<-EA_yes_pos%>% # rename items
  rename("Item 1" = int_pos_id_YES_1,
         "Item 2" = int_pos_id_YES_2,
         "Item 3" = int_pos_id_YES_3,
         "Item 4" = int_pos_id_YES_4,
         "Item 5" = int_pos_id_YES_5,
         "Item 6" = int_pos_id_YES_6,
         "Item 7" = int_pos_id_YES_7,
         "Item 8" = int_pos_id_YES_8)
#EA_yes_pos<- na.omit(EA_yes_pos) # remove those who did not see the questions because did not get an interest id question or they answered 1-2 on standard political interest
#yes_pos<-rbind(EA_yes_pos, CA_yes_pos)

# Confirmatory analysis
CA_yes_pos<-subset(IrtCa, select = grepl("int_pos_id_YES_", names(IrtCa))) 
CA_yes_pos<-CA_yes_pos%>% 
  rename("Item 1" = int_pos_id_YES_1,
         "Item 2" = int_pos_id_YES_2,
         "Item 3" = int_pos_id_YES_3,
         "Item 4" = int_pos_id_YES_4,
         "Item 5" = int_pos_id_YES_5,
         "Item 6" = int_pos_id_YES_6,
         "Item 7" = int_pos_id_YES_7,
         "Item 8" = int_pos_id_YES_8)
CA_yes_pos<- na.omit(CA_yes_pos) 

# 2.2. Data frame with only negative scale items --------------------------------
# Exploratory analysis
EA_yes_neg<-subset(IrtEa, select = grepl("int_neg_id_YES_", names(IrtEa))) # negative items
EA_yes_neg<-EA_yes_neg%>% # rename
  rename("Item 1" = int_neg_id_YES_1,
         "Item 2" = int_neg_id_YES_2,
         "Item 3" = int_neg_id_YES_3,
         "Item 4" = int_neg_id_YES_4,
         "Item 5" = int_neg_id_YES_5,
         "Item 6" = int_neg_id_YES_6,
         "Item 7" = int_neg_id_YES_7,
         "Item 8" = int_neg_id_YES_8)
EA_yes_neg<- na.omit(EA_yes_neg) # remove NAs

# Confirmatory analysis
CA_yes_neg<-subset(IrtCa, select = grepl("int_neg_id_YES_", names(IrtCa))) # negative items
CA_yes_neg<-CA_yes_neg%>%
  rename("Item 1" = int_neg_id_YES_1,
         "Item 2" = int_neg_id_YES_2,
         "Item 3" = int_neg_id_YES_3,
         "Item 4" = int_neg_id_YES_4,
         "Item 5" = int_neg_id_YES_5,
         "Item 6" = int_neg_id_YES_6,
         "Item 7" = int_neg_id_YES_7,
         "Item 8" = int_neg_id_YES_8)
CA_yes_neg<- na.omit(CA_yes_neg) # remove NAs

# 3. Data frames for Uninterested participants ---------------------------------
# 3.1. Data frame with only positive scale items -------------------------------
# Exploratory analysis
EA_no_pos<-subset(IrtEa, select = grepl("int_pos_id_NO_", names(IrtEa))) # select only positive items
EA_no_pos<-EA_no_pos%>% # rename items
  rename("Item 1" = int_pos_id_NO_1,
         "Item 2" = int_pos_id_NO_2,
         "Item 3" = int_pos_id_NO_3,
         "Item 4" = int_pos_id_NO_4,
         "Item 5" = int_pos_id_NO_5,
         "Item 6" = int_pos_id_NO_6,
         "Item 7" = int_pos_id_NO_7,
         "Item 8" = int_pos_id_NO_8)
EA_no_pos<- na.omit(EA_no_pos) # remove those who did not see the questions because did not get an interest id question or they answered 1-2 on standard political interest

# Confirmatory analysis
CA_no_pos<-subset(IrtCa, select = grepl("int_pos_id_NO_", names(IrtCa))) 
CA_no_pos<-CA_no_pos%>%
  rename("Item 1" = int_pos_id_NO_1,
         "Item 2" = int_pos_id_NO_2,
         "Item 3" = int_pos_id_NO_3,
         "Item 4" = int_pos_id_NO_4,
         "Item 5" = int_pos_id_NO_5,
         "Item 6" = int_pos_id_NO_6,
         "Item 7" = int_pos_id_NO_7,
         "Item 8" = int_pos_id_NO_8)
CA_no_pos<- na.omit(CA_no_pos) 

# 3.2. Data frame with only negative scale items -------------------------------
# Exploratory analysis
EA_no_neg<-subset(IrtEa, select = grepl("int_neg_id_NO_", names(IrtEa))) 
EA_no_neg<-EA_no_neg%>%
  rename("Item 1" = int_neg_id_NO_1,
         "Item 2" = int_neg_id_NO_2,
         "Item 3" = int_neg_id_NO_3,
         "Item 4" = int_neg_id_NO_4,
         "Item 5" = int_neg_id_NO_5,
         "Item 6" = int_neg_id_NO_6,
         "Item 7" = int_neg_id_NO_7,
         "Item 8" = int_neg_id_NO_8)
EA_no_neg<- na.omit(EA_no_neg) 

# Confirmatory analysis
CA_no_neg<-subset(IrtCa, select = grepl("int_neg_id_NO_", names(IrtCa))) # negative items
CA_no_neg<-CA_no_neg%>%
  rename("Item 1" = int_neg_id_NO_1,
         "Item 2" = int_neg_id_NO_2,
         "Item 3" = int_neg_id_NO_3,
         "Item 4" = int_neg_id_NO_4,
         "Item 5" = int_neg_id_NO_5,
         "Item 6" = int_neg_id_NO_6,
         "Item 7" = int_neg_id_NO_7,
         "Item 8" = int_neg_id_NO_8)
CA_no_neg<- na.omit(CA_no_neg) # remove NAs

# 4. Compute average interest identity scores ----------------------------------
# Exploratory analysis
IrtEa <- IrtEa %>%
  # positive id sum
  mutate(pos_id_sum = rowSums(dplyr::select(., starts_with("int_pos_id")), na.rm = TRUE))%>%
  # positive id mean
  mutate(pos_id_mean = ifelse(pos_id_sum == 0, NA, pos_id_sum/8))%>%
  # negative id sum
  mutate(neg_id_sum = rowSums(dplyr::select(., starts_with("int_neg_id")), na.rm = TRUE))%>%
  # negative id mean
  mutate(neg_id_mean = ifelse(neg_id_sum == 0, NA, neg_id_sum/8))

IrtEa$pos_id_sum[IrtEa$pos_id_sum==0]<-NA # make sure the 0 are recognized as NA's
IrtEa$neg_id_sum[IrtEa$neg_id_sum==0]<-NA # make sure the 0 are recognized as NA's

# Confirmatory analysis
IrtCa <- IrtCa %>%
  # positive id sum
  mutate(pos_id_sum = rowSums(dplyr::select(., starts_with("int_pos_id")), na.rm = TRUE))%>%
  # positive id mean
  mutate(pos_id_mean = ifelse(pos_id_sum == 0, NA, pos_id_sum/8))%>%
  # negative id sum
  mutate(neg_id_sum = rowSums(dplyr::select(., starts_with("int_neg_id")), na.rm = TRUE))%>%
  # negative id mean
  mutate(neg_id_mean = ifelse(neg_id_sum == 0, NA, neg_id_sum/8))

IrtCa$pos_id_sum[IrtCa$pos_id_sum==0]<-NA # make sure the 0 are recognized as NA's
IrtCa$neg_id_sum[IrtCa$neg_id_sum==0]<-NA # make sure the 0 are recognized as NA's




